"""
analysis after one second whether there are duplicate in the crawler data
"""
import simplejson

def duplicateanalysis(flist):
    """
    @param flist file list of the file to check
    """
    idlist = {}
    for f in flist:
        idlist[f] = []
        fj = simplejson.load(file(f))
        for wbitem in fj:
            idlist[f].append(wbitem['id'])
    for k1 in idlist.keys():
        for k2 in idlist.keys():
            if k1 == k2:
                continue
            samecount = 0
            for i1 in idlist[k1]:
                for i2 in idlist[k2]:
                    if i1 == i2:
                        samecount += 1
            print k1+":"+k2+":"+str(samecount)



if __name__ == "__main__":
    flist = [
    "2011_04_02_12_38_42",
    "2011_04_02_12_38_40",
    "2011_04_02_12_38_39"]
    duplicateanalysis(flist)
